Importing the libraries¶

# NumPy -> To perform the Mathematical operations
import numpy as np

# Pandas -> Data Manipulation tool
import pandas as pd

# Matplotlib -> Data Visualisation tool
import matplotlib.pyplot as plt

# Seaborn -> Data Visualisation tool
import seaborn as sns

# SQLite -> Server-less Database
import sqlite3

Hints & Tips¶

Establishing the connection to the database

db = '/content/drive/MyDrive/Datasets/movies.sqlite'
conn = sqlite3.connect(db)
cur = conn.cursor()

Get all the data about movies

# Fetching the data of the Movies table from the Database
cur.execute("SELECT * FROM movies")
# Creating the cursor object
movies = cur.fetchall()

# Creating a DataFrame
movies = pd.DataFrame(movies,columns=['id', 'original_title', 'budget', 'popularity',
                                  'release_date', 'revenue', 'title', 'vote_average',
                                  'vote_count', 'overview', 'tagline','uid',
                                  'director_id'] )

# Displaying the DataFrame
movies

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4773 entries, 0 to 4772
Data columns (total 13 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   id              4773 non-null   int64  
 1   Original_title  4773 non-null   object 
 2   Budget          4773 non-null   int64  
 3   Popularity      4773 non-null   int64  
 4   Release_date    4773 non-null   object 
 5   Revenue         4773 non-null   int64  
 6   Title           4773 non-null   object 
 7   Avg_ratings     4773 non-null   float64
 8   Rating_count    4773 non-null   int64  
 9   Overview        4770 non-null   object 
 10  Tagline         3951 non-null   object 
 11  UID             4773 non-null   int64  
 12  director_id     4773 non-null   int64  
dtypes: float64(1), int64(7), object(5)
memory usage: 484.9+ KB

Get all the data about directors

# Fetching the data of the Director table from the Database
cur.execute("SELECT * FROM directors")
# Creating the cursor object
directors = cur.fetchall()

directors

directors = pd.DataFrame(directors,columns=['name', 'id', 'gender', 'uid',
                                  'department'] )

directors

directors.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2349 entries, 0 to 2348
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   name        2349 non-null   object
 1   id          2349 non-null   int64 
 2   gender      2349 non-null   int64 
 3   uid         2349 non-null   int64 
 4   department  2349 non-null   object
dtypes: int64(3), object(2)
memory usage: 91.9+ KB

Check how many movies are present in the IMDB table

cur.execute('SELECT COUNT(Title) FROM movies')
count = cur.fetchall()
print(f"The number of movies present in the IMDB databse is {count[0]}")

The number of movies present in the IMDB databse is (4773,)

Find these 3 directors: James Cameron, Luc Besson, John Woo

cur.execute("SELECT * FROM directors WHERE name IN ('James Cameron','Luc Besson', 'John Woo')")
cur.execute("SELECT * FROM directors WHERE name =='James Cameron' or name =='Luc Besson' or name == 'John Woo'")
three_directors = cur.fetchall()
print(f"Theses 3 Directors data are: {three_directors}")

Theses 3 Directors data are: [('James Cameron', 4762, 2, 2710, 'Directing'), ('John Woo', 4893, 2, 11401, 'Directing'), ('Luc Besson', 4949, 2, 59, 'Directing')]

Find all the directors with name starting with 'Steven'

cur.execute('SELECT * FROM directors WHERE name LIKE "Steven%"')
name_like = cur.fetchall()
print(f"The directors whose names are starting with the word 'Steven' are: {name_like}")

The directors whose names are starting with the word 'Steven' are: [('Steven Spielberg', 4799, 2, 488, 'Directing'), ('Steven Soderbergh', 4909, 2, 1884, 'Directing'), ('Steven Brill', 5013, 2, 32593, 'Directing'), ('Steven Zaillian', 5117, 2, 2260, 'Directing'), ('Steven Quale', 5216, 2, 93214, 'Directing'), ('Steven Seagal', 5221, 2, 23880, 'Directing'), ('Steven E. de Souza', 5390, 2, 1726, 'Directing'), ('Steven Shainberg', 5803, 2, 67795, 'Directing'), ('Steven R. Monroe', 6713, 2, 88039, 'Directing')]

Count the Female directors

cur.execute("SELECT COUNT(*) FROM directors WHERE gender == '1'")
females = cur.fetchall()
print(f"The number of female directors is {females[0][0]}")

The number of female directors is 150

Find the name of the 10th first women directors

cur.execute('SELECT name FROM directors WHERE gender==1')
tenth = cur.fetchall()
print(f"The tenth first women is {tenth[9][0]}")

The tenth first women is Angelina Jolie

What are the 3 most popular movies?

cur.execute('SELECT title FROM movies ORDER BY popularity DESC LIMIT 3 ')
most_popular = cur.fetchall()
print(f"The 3 mostpopular movies are: {most_popular[0][0]}, {most_popular[1][0]} and {most_popular[2][0]}")

The 3 mostpopular movies are: Minions, Interstellar and Deadpool

What are the 3 most bankable movies?

cur.execute('SELECT title FROM movies ORDER BY budget DESC LIMIT 3')
most_bankable = cur.fetchall()
most_bankable
print(f"The three most bankable movies are {most_bankable[0][0]}, {most_bankable[1][0]} and {most_bankable[2][0]}")

The three most bankable movies are Pirates of the Caribbean: On Stranger Tides, Pirates of the Caribbean: At World's End and Avengers: Age of Ultron

What is the most awarded average rated movie since the Jan 1st, 2000?

cur.execute("SELECT Original_title FROM movies WHERE Release_date	 > '2000-01-01' ORDER BY vote_average DESC LIMIT 1")
most_awarded_avg = cur.fetchall()
print(f"The most awarded average rated movie is {most_awarded_avg[0][0]}")

The most awarded average rated movie is Sardaarji

Which movie(s) were directed by Brenda Chapman?

cur.execute("SELECT original_title FROM movies JOIN directors ON directors.id = movies.director_id WHERE directors.name = 'Brenda Chapman'")
directed_by = cur.fetchall()
print(f"The movie(s) directed by Brenda Chapman is {directed_by[0][0]}")

The movie(s) directed by Brenda Chapman is Brave

Name the director who has made the most movies?

cur.execute("SELECT name FROM directors JOIN movies ON directors.id = movies.director_id GROUP BY director_id ORDER BY COUNT(name) DESC limit 1")
director_movie = cur.fetchall()
print(f"The director who made the most movies is {director_movie[0][0]}")

The director who made the most movies is Steven Spielberg

Name of the director who is most bankable

cur.execute("SELECT name FROM directors JOIN movies ON directors.id = movies.director_id GROUP BY director_id ORDER BY SUM(budget) DESC limit 1")
most_bankable = cur.fetchall()
print(f'The most bankable director is {most_bankable[0][0]}')

The most bankable director is Steven Spielberg

Budget Analysis¶

Tell the Top 10 highest budget making movie

movies.columns

Index(['id', 'original_title', 'budget', 'popularity', 'release_date',
       'revenue', 'title', 'vote_average', 'vote_count', 'overview', 'tagline',
       'uid', 'director_id'],
      dtype='object')

cur.execute('Select * FROM movies ORDER BY budget DESC LIMIT 10')
top_10 = cur.fetchall()
most_popular = pd.DataFrame(top_10, columns=['id', 'original_title', 'budget', 'popularity', 'release_date',
       'revenue', 'title', 'vote_average', 'vote_count', 'overview', 'tagline',
       'uid', 'director_id'])
most_popular

Revenue Analysis¶

Find Top 10 Revenue making movies

cur.execute("SELECT * FROM movies ORDER BY revenue DESC LIMIT 10")
top10_movies = cur.fetchall()
most_revenue = pd.DataFrame(top10_movies,  columns= ['id','original_title','budget','popularity','release_date',
                                    'revenue', 'title','vote_average','vote_count','overview',
                                    'tagline','uid','director_id'])
most_revenue

Voting Analysis¶

Find the most popular movies with highest vote_average

cur.execute(("SELECT * FROM movies ORDER BY vote_average DESC LIMIT 10"))
most_pop = cur.fetchall() 
most_popular_movie = pd.DataFrame(most_pop,columns =['id', 'original_title', 'budget', 'popularity', 'release_date',
       'revenue', 'title', 'vote_average', 'vote_count', 'overview', 'tagline',
       'uid', 'director_id'])
most_popular_movie

Director Analysis¶

Name all the directors with the number of movies and revenue where Revenue should be taken into account for doing the analysis. The director who has the highest revenue should comes at the top and so on and so forth.

cur.execute("SELECT name,COUNT(*) AS 'Total Movies',SUM(revenue) AS 'Total Revenue' FROM  directors JOIN movies WHERE directors.id==movies.director_id GROUP BY director_id ORDER BY SUM(revenue) DESC")
director_revenue=cur.fetchall()
director_most_revenue=pd.DataFrame(director_revenue,columns=['Director_Name','Total Movies','Total Revenue'])
director_most_revenue.head(10)

Name all the directors with the number of movies and revenue where number of movies should be taken into account for doing the analysis. The director who has the highest number of movies should comes at the top and so on and so forth.

cur.execute("SELECT name, COUNT(title), SUM(revenue) FROM directors JOIN movies ON movies.director_id = directors.id GROUP by director_id ORDER BY  COUNT(title) DESC LIMIT 10")
director_movies = cur.fetchall()
director_most_movies = pd.DataFrame(director_movies,columns=['name','no_of_title','revenue'])
director_most_movies

Give the Title of the movie, realease_date, budget, revenue, popularity and vote_average made by Steven Spielberg

cur.execute("SELECT title, release_date,budget,revenue,popularity,vote_average FROM directors JOIN movies ON directors.id==movies.director_id WHERE directors.name=='Steven Spielberg'")
movies_list=cur.fetchall()
movies_list_Steven_Spielberg=pd.DataFrame(movies_list,columns=['Movie_Name','Release_Date','Total Budget','Total_Revenue','Popularity','Vote_Average'])
movies_list_Steven_Spielberg

	id	Original_title	Budget	Popularity	Release_date	Revenue	Title	Avg_ratings	Rating_count	Overview	Tagline	uid	director_id
0	43597	Avatar	237000000	150	2009-12-10	2787965087	Avatar	7.2	11800	In the 22nd century, a paraplegic Marine is di...	Enter the World of Pandora.	NaN	4762
1	43598	Pirates of the Caribbean: At World's End	300000000	139	2007-05-19	961000000	Pirates of the Caribbean: At World's End	6.9	4500	Captain Barbossa, long believed to be dead, ha...	At the end of the world, the adventure begins.	NaN	4763
2	43599	Spectre	245000000	107	2015-10-26	880674609	Spectre	6.3	4466	A cryptic message from Bond’s past sends him o...	A Plan No One Escapes	NaN	4764
3	43600	The Dark Knight Rises	250000000	112	2012-07-16	1084939099	The Dark Knight Rises	7.6	9106	Following the death of District Attorney Harve...	The Legend Ends	NaN	4765
4	43601	John Carter	260000000	43	2012-03-07	284139100	John Carter	6.1	2124	John Carter is a war-weary, former military ca...	Lost in our world, found in another.	NaN	4766
...	...	...	...	...	...	...	...	...	...	...	...	...	...
4768	48395	El Mariachi	220000	14	1992-09-04	2040920	El Mariachi	6.6	238	El Mariachi just wants to play his guitar and ...	He didn't come looking for trouble, but troubl...	NaN	5097
4769	48396	Newlyweds	9000	0	2011-12-26	0	Newlyweds	5.9	5	A newlywed couple's honeymoon is upended by th...	A newlywed couple's honeymoon is upended by th...	NaN	6485
4770	48397	Signed, Sealed, Delivered	0	1	2013-10-13	0	Signed, Sealed, Delivered	7.0	6	"Signed, Sealed, Delivered" introduces a dedic...	None	NaN	7108
4771	48398	Shanghai Calling	0	0	2012-05-03	0	Shanghai Calling	5.7	7	When ambitious New York attorney Sam is sent t...	A New Yorker in Shanghai	NaN	7109
4772	48399	My Date with Drew	0	1	2005-08-05	0	My Date with Drew	6.3	16	Ever since the second grade when he first saw ...	None	NaN	7110

	name	id	gender	uid	department
0	James Cameron	4762	2	2710	Directing
1	Gore Verbinski	4763	2	1704	Directing
2	Sam Mendes	4764	2	39	Directing
3	Christopher Nolan	4765	2	525	Directing
4	Andrew Stanton	4766	2	7	Directing
...	...	...	...	...	...
2344	Shane Carruth	7106	2	76624	Directing
2345	Neill Dela Llana	7107	0	1174437	Directing
2346	Scott Smith	7108	0	1219158	Directing
2347	Daniel Hsia	7109	2	208138	Directing
2348	Brian Herzlinger	7110	2	85563	Directing

	id	original_title	budget	popularity	release_date	revenue	title	vote_average	vote_count	overview	tagline	uid	director_id
0	43614	Pirates of the Caribbean: On Stranger Tides	380000000	135	2011-05-14	1045713802	Pirates of the Caribbean: On Stranger Tides	6.4	4948	Captain Jack Sparrow crosses paths with a woma...	Live Forever Or Die Trying.	1865	4775
1	43598	Pirates of the Caribbean: At World's End	300000000	139	2007-05-19	961000000	Pirates of the Caribbean: At World's End	6.9	4500	Captain Barbossa, long believed to be dead, ha...	At the end of the world, the adventure begins.	285	4763
2	43604	Avengers: Age of Ultron	280000000	134	2015-04-22	1405403694	Avengers: Age of Ultron	7.3	6767	When Tony Stark tries to jumpstart a dormant p...	A New Age Has Come.	99861	4769
3	43607	Superman Returns	270000000	57	2006-06-28	391081192	Superman Returns	5.4	1400	Superman returns to discover his 5-year absenc...	None	1452	4772
4	43601	John Carter	260000000	43	2012-03-07	284139100	John Carter	6.1	2124	John Carter is a war-weary, former military ca...	Lost in our world, found in another.	49529	4766
5	43603	Tangled	260000000	48	2010-11-24	591794936	Tangled	7.4	3330	When the kingdom's most wanted-and most charmi...	They're taking adventure to new lengths.	38757	4768
6	43602	Spider-Man 3	258000000	115	2007-05-01	890871626	Spider-Man 3	5.9	3576	The seemingly invincible Spider-Man goes up ag...	The battle within.	559	4767
7	43610	The Lone Ranger	255000000	49	2013-07-03	89289910	The Lone Ranger	5.9	2311	The Texas Rangers chase down a gang of outlaws...	Never Take Off the Mask	57201	4763
8	43600	The Dark Knight Rises	250000000	112	2012-07-16	1084939099	The Dark Knight Rises	7.6	9106	Following the death of District Attorney Harve...	The Legend Ends	49026	4765
9	43605	Harry Potter and the Half-Blood Prince	250000000	98	2009-07-07	933959197	Harry Potter and the Half-Blood Prince	7.4	5293	As Harry begins his sixth year at Hogwarts, he...	Dark Secrets Revealed	767	4770

	id	original_title	budget	popularity	release_date	revenue	title	vote_average	vote_count	overview	tagline	uid	director_id
0	43597	Avatar	237000000	150	2009-12-10	2787965087	Avatar	7.2	11800	In the 22nd century, a paraplegic Marine is di...	Enter the World of Pandora.	19995	4762
1	43622	Titanic	200000000	100	1997-11-18	1845034188	Titanic	7.5	7562	84 years later, a 101-year-old woman named Ros...	Nothing on Earth could come between them.	597	4762
2	43613	The Avengers	220000000	144	2012-04-25	1519557910	The Avengers	7.4	11776	When an unexpected enemy emerges and threatens...	Some assembly required.	24428	4769
3	43625	Jurassic World	150000000	418	2015-06-09	1513528810	Jurassic World	6.5	8662	Twenty-two years after the events of Jurassic ...	The park is open.	135397	4783
4	43641	Furious 7	190000000	102	2015-04-01	1506249360	Furious 7	7.3	4176	Deckard Shaw seeks revenge against Dominic Tor...	Vengeance Hits Home	168259	4794
5	43604	Avengers: Age of Ultron	280000000	134	2015-04-22	1405403694	Avengers: Age of Ultron	7.3	6767	When Tony Stark tries to jumpstart a dormant p...	A New Age Has Come.	99861	4769
6	43721	Frozen	150000000	165	2013-11-27	1274219009	Frozen	7.3	5295	Young princess Anna of Arendelle dreams about ...	Only the act of true love will thaw a frozen h...	109445	4844
7	43628	Iron Man 3	200000000	77	2013-04-18	1215439994	Iron Man 3	6.8	8806	When Tony Stark's world is torn apart by a for...	Unleash the power behind the armor.	68721	4784
8	44143	Minions	74000000	875	2015-06-17	1156730962	Minions	6.4	4571	Minions Stuart, Kevin and Bob are recruited by...	Before Gru, they had a history of bad bosses	211672	5045
9	43623	Captain America: Civil War	250000000	198	2016-04-27	1153304495	Captain America: Civil War	7.1	7241	Following the events of Age of Ultron, the col...	Divided We Fall	271110	4781

	id	original_title	budget	popularity	release_date	revenue	title	vote_average	vote_count	overview	tagline	uid	director_id
0	47116	Stiff Upper Lips	0	0	1998-06-12	0	Stiff Upper Lips	10.0	1	Stiff Upper Lips is a broad parody of British ...	None	89861	6332
1	47642	Dancer, Texas Pop. 81	0	0	1998-05-01	565592	Dancer, Texas Pop. 81	10.0	1	Four guys, best friends, have grown up togethe...	in the middle of nowhere they had everything	78373	5463
2	47589	Sardaarji	0	0	2015-06-26	0	Sardaarji	9.5	2	A ghost hunter uses bottles to capture trouble...	None	346081	6588
3	45983	One Man's Hero	0	0	1999-08-02	0	One Man's Hero	9.3	2	One Man's Hero tells the little-known story of...	One man's hero is another man's traitor.	69848	5770
4	45478	The Shawshank Redemption	25000000	136	1994-09-23	28341469	The Shawshank Redemption	8.5	8205	Framed in the 1940s for the double murder of h...	Fear can hold you prisoner. Hope can set you f...	278	5051
5	46567	There Goes My Baby	10500000	0	1994-09-02	123509	There Goes My Baby	8.5	2	A group of high school seniors meets in the su...	None	88641	6051
6	46393	The Prisoner of Zenda	0	4	1937-09-03	0	The Prisoner of Zenda	8.4	11	An Englishman on a Ruritarian holiday must imp...	The most thrilling swordfight ever filmed...	43867	5972
7	46934	The Godfather	6000000	143	1972-03-14	245066411	The Godfather	8.4	5893	Spanning the years 1945 to 1955, a chronicle o...	An offer you can't refuse.	238	5179
8	44259	Fight Club	63000000	146	1999-10-15	100853753	Fight Club	8.3	9413	A ticking-time-bomb insomniac and a slippery s...	Mischief. Mayhem. Soap.	550	4829
9	45415	Schindler's List	22000000	104	1993-11-29	321365567	Schindler's List	8.3	4329	The true story of how businessman Oskar Schind...	Whoever saves one life, saves the world entire.	424	4799

	Director_Name	Total Movies	Total Revenue
0	Steven Spielberg	27	9147393164
1	Peter Jackson	9	6498642820
2	James Cameron	7	5883569439
3	Michael Bay	12	5832524638
4	Christopher Nolan	8	4227483234
5	Chris Columbus	11	3725631503
6	Robert Zemeckis	13	3590622002
7	George Lucas	5	3339113893
8	Tim Burton	14	3337418241
9	Ridley Scott	16	3189557997

	name	no_of_title	revenue
0	Steven Spielberg	27	9147393164
1	Woody Allen	21	669101038
2	Martin Scorsese	20	1956635998
3	Clint Eastwood	20	2512058888
4	Ridley Scott	16	3189557997
5	Robert Rodriguez	16	1109899581
6	Spike Lee	16	340618771
7	Steven Soderbergh	15	2114864443
8	Renny Harlin	15	1031500590
9	Tim Burton	14	3337418241

	Movie_Name	Release_Date	Total Budget	Total_Revenue	Popularity	Vote_Average
0	Indiana Jones and the Kingdom of the Crystal S...	2008-05-21	185000000	786636033	75	5.7
1	The BFG	2016-06-01	140000000	183345589	44	6.0
2	War of the Worlds	2005-06-28	132000000	591739379	48	6.2
3	The Adventures of Tintin	2011-10-25	130000000	371940071	89	6.7
4	Minority Report	2002-06-20	102000000	358372926	65	7.1
5	A.I. Artificial Intelligence	2001-06-29	100000000	235926552	34	6.8
6	The Lost World: Jurassic Park	1997-05-23	73000000	229074524	2	6.2
7	The Terminal	2004-06-17	60000000	219417255	57	7.0
8	Munich	2005-12-22	70000000	130358911	29	6.9
9	Hook	1991-12-11	70000000	300854823	33	6.6
10	War Horse	2011-12-25	66000000	177584879	29	7.0
11	Saving Private Ryan	1998-07-24	70000000	481840909	76	7.9
12	Lincoln	2012-11-09	65000000	275293450	36	6.7
13	Jurassic Park	1993-06-11	63000000	920100000	40	7.6
14	Catch Me If You Can	2002-12-25	52000000	352114312	73	7.7
15	Indiana Jones and the Last Crusade	1989-05-24	48000000	474171806	80	7.6
16	Bridge of Spies	2015-10-15	40000000	165478348	48	7.2
17	Amistad	1997-12-03	36000000	74000000	3	6.8
18	1941	1979-12-13	35000000	31755742	10	5.6
19	Indiana Jones and the Temple of Doom	1984-05-23	28000000	333000000	66	7.1
20	Schindler's List	1993-11-29	22000000	321365567	104	8.3
21	Raiders of the Lost Ark	1981-06-12	18000000	389925971	68	7.7
22	Close Encounters of the Third Kind	1977-11-16	20000000	303788635	52	7.2
23	The Color Purple	1985-12-18	15000000	146292009	17	7.7
24	Jaws	1975-06-18	7000000	470654000	50	7.5
25	E.T. the Extra-Terrestrial	1982-04-03	10500000	792910554	56	7.3
26	Twilight Zone: The Movie	1983-06-24	10000000	29450919	12	6.2